/*
This code reads files publicly available from CMS.
*/

***************************************************************
* read in 2017 benchmark data to create dichotomization flags *
***************************************************************

import delimited "$data_orig\calculationdata2017\csv\risk2017", varnames(21) case(lower) clear
keep code state county ffsrateexcludingphaseoutime v22 preacaratecategory quartilepercentfactor v37
drop if inlist(state, "Northern Mariana Isl", "American Samoa", "GU", "PR", "VI")
rename code stcty
rename ffsrateexcludingphaseoutime ffsrate17
rename v22 bnchmrk17_preaca
rename quartilepercentfactor quartilepercent17
rename v37 bnchmrk17_aca
destring stcty ffsrate17 bnchmrk17_preaca bnchmrk17_aca, replace ignore(",")
gen bnch_cut = bnchmrk17_preaca - bnchmrk17_aca
gen bnch_cut_prop = bnch_cut/bnchmrk17_preaca * 100
save "$data_deri\ratedata_17", replace

sum bnch_cut_prop, detail
local q4_prop = r(p75)
gen q4_prop_flag = (bnch_cut_prop > `q4_prop')
label variable q4_prop_flag "Above 75th Percentile Proportional Cuts"
label define q4_prop_flag_lbl 0 "Other Counties" 1 "Top 25% Proportional Cuts"
label values q4_prop_flag q4_prop_flag_lbl

sum bnch_cut_prop, detail
local med_prop = r(p50)
gen med_prop_flag = (bnch_cut_prop > `med_prop')
label variable med_prop_flag "Above Median Proportional Cuts"
label define med_prop_flag_lbl 0 "Other Counties" 1 "Top 50% Proportional Cuts"
label values med_prop_flag med_prop_flag_lbl

sum bnch_cut_prop, detail
local q10_prop = r(p90)
gen q10_prop_flag = (bnch_cut_prop > `q10_prop')
label variable q10_prop_flag "Above 90% Proportional Cuts"
label define q10_prop_flag_lbl 0 "Other Counties" 1 "Top 10% Proportional Cuts"
label values q10_prop_flag q10_prop_flag_lbl

keep state county stcty bnch_cut bnch_cut_prop med_prop_flag q4_prop_flag q10_prop_flag
compress
save "$data_deri\dichot_flags", replace


**********************************************
* read in 2017 MA benchmark data for Table 1 *
**********************************************

import delimited "$data_orig\calculationdata2017\csv\risk2017", varnames(21) case(lower) clear
keep code state county v22 v37
tab state
drop if inlist(state, "GU", "PR", "VI", "Northern Mariana Isl", "American Samoa")
duplicates example state
rename state statecode
rename code stcty
rename v22 bnchmrk17_preaca
rename v37 bnchmrk17_aca
destring stcty, replace

destring stcty bnchmrk17_preaca bnchmrk17_aca, replace ignore(",")
gen bnch_diff = bnchmrk17_aca - bnchmrk17_preaca
gen bnch_diff_prop = bnch_diff/bnchmrk17_preaca * 100

sum bnch_diff_prop, detail
local q4_prop = r(p25)
count if bnch_diff_prop == r(p25)
gen q4_prop_flag_rev = (bnch_diff_prop >= `q4_prop')
label variable q4_prop_flag_rev "Above Q4 Proportional Cuts"
label define q4_prop_flag_rev_lab 1 "Counties with Smaller Cuts" 0 "Counties with the Largest Cuts"
label values q4_prop_flag_rev q4_prop_flag_rev_lab

compress
save "$data_deri\tbl1_ratedata_17", replace


****************************************
* read in 2008-2019 MA enrollment data *
****************************************

forvalues yr = 2008/2016 {
	import delimited "$data_orig\State_County_Penetration_MA_`yr'_06", varnames(1) case(lower) clear
	keep statename countyname ssa eligibles enrolled penetration
	drop if inlist(statename, "Pending State Designation", "Northern Mariana Islands", "American Samoa", "Guam", "Puerto Rico", "Virgin Islands")
	drop if countyname=="Pending County Designation"
	rename ssa stcty
	gen year = `yr'
	save "$data_deri\enroll_`yr'", replace
	
	if `yr'==2011 {
		rename eligibles eligibles11
		rename enrolled enrolled11
		rename penetration penetration11
		keep stcty eligibles11 enrolled11 penetration11
		save "$data_deri\pop_weights", replace
	}
}

forvalues yr = 2017/2019 {
	import delimited "$data_orig\State_County_Penetration_MA_`yr'_06", varnames(1) case(lower) clear
	keep statename countyname ssa eligibles enrolled penetration
	drop if inlist(statename, "PUERTO RICO", "VIRGIN ISLANDS")
	drop if countyname=="Pending County Designation"
	rename ssa stcty
	gen year = `yr'
	save "$data_deri\enroll_`yr'", replace	
}


********************************************************
* read in 2011 and 2019 MA enrollment data for Table 1 *
********************************************************

import delimited "$data_orig\State_County_Penetration_MA_2011_06", varnames(1) case(lower) clear
keep statename countyname ssa eligibles enrolled penetration
tab statename
drop if inlist(statename, "Pending State Designation", "Northern Mariana Islands", "American Samoa", "Guam", "Puerto Rico", "Virgin Islands")
duplicates example statename
rename ssa stcty
rename eligibles eligibles11
rename enrolled enrolled11
rename penetration penetration11
save "$data_deri\tbl1_enroll_11", replace

import delimited "$data_orig\State_County_Penetration_MA_2019_06", varnames(1) case(lower) clear
keep statename countyname ssa eligibles enrolled penetration
tab statename
drop if inlist(statename, "PUERTO RICO", "VIRGIN ISLANDS")
duplicates example statename
rename ssa stcty
rename eligibles eligibles19
rename enrolled enrolled19
rename penetration penetration19
save "$data_deri\tbl1_enroll_19", replace



**************************************************
* read in MA county-plan type-level payment data *
**************************************************

forvalues i = 2008/2010 {
	import excel "$data_orig/`i'paymentdata/`i'PartCCountyLevel", cellrange(A3) firstrow case(lower) clear
	keep statecountycode state countyname plantype snpplantype averageriskscore averageabpmpmpayment averagerebatepmpmpayment
	rename statecountycode stcty
	destring stcty, replace
	gen year = `i'
	save "$data_deri\plantype_pay_`i'", replace
}

forvalues i = 2011/2011 {
	import excel "$data_orig/`i'paymentdata/`i'PartCCountyLevel", cellrange(A2) firstrow case(lower) clear
	keep statecountycode state countyname plantype snpplantype averageriskscore averageabpmpmpayment averagerebatepmpmpayment
	rename statecountycode stcty
	destring stcty, replace
	gen year = `i'
	save "$data_deri\plantype_pay_`i'", replace
}

forvalues i = 2012/2013 {
	import excel "$data_orig/`i'paymentdata/`i'PartCCountyLevel", cellrange(A3) firstrow case(lower) clear
	keep countycode stateabbreviation countyname plantype snpplantype averageriskscore averageabpmpmpayment averagerebatepmpmpayment
	rename countycode stcty
	destring stcty, replace
	rename stateabbreviation state
	gen year = `i'
	save "$data_deri\plantype_pay_`i'", replace
}

forvalues i = 2014/2014 {
	import excel "$data_orig/`i'paymentdata/`i'PartCCountyLevel", cellrange(A3) firstrow case(lower) clear
	keep countycode state countyname plantype snpplantype averagepartriskscore averageabpmpmpayment averagerebatepmpmpayment
	rename countycode stcty
	destring stcty, replace
	rename stateabbreviation state
	rename averagepartriskscore averageriskscore
	gen year = `i'
	save "$data_deri\plantype_pay_`i'", replace
}

forvalues i = 2015/2019 {
	import excel "$data_orig/`i'paymentdata/`i'PartCCountyLevel", cellrange(A3) firstrow case(lower) clear
	keep countycode state countyname plantype snpplantype averagepartcriskscore averageabpmpmpayment averagerebatepmpmpayment
	rename countycode stcty
	destring stcty, replace
	rename stateabbreviation state
	rename averagepartcriskscore averageriskscore
	gen year = `i'
	save "$data_deri\plantype_pay_`i'", replace
}


*********************************************
* calculate MA plan type enrollment weights *
*********************************************

import excel "$data_orig\enrl_fac.xlsx", firstrow clear //See eMethods for explanation of how plan type enrollment ratios were determined
keep plan_type factor
tempfile enrl_fac
save `enrl_fac'

use "$data_deri\plantype_pay_2008", clear
forvalues i = 2009/2019 {
	append using "$data_deri\plantype_pay_`i'"
}
drop if inlist(state, "0", "GU", "PR", "VI", "")

gen plan_type = plantype + " " + snpplantype
replace plan_type = strtrim(plan_type)

keep stcty plan_type
duplicates drop

merge m:1 stcty using "$data_deri\pop_weights", gen(weight_merge)
list stcty if weight_merge==2
keep if weight_merge==3

merge m:1 plan_type using `enrl_fac'
drop _merge

destring eligibles11 , replace ignore(",")
destring enrolled11, replace ignore("," "*")

bysort stcty: egen total = total(factor)
gen imp_enrl = enrolled11*factor/total

keep stcty plan_type eligibles11 imp_enrl

tempfile plan_type_weight
save `plan_type_weight'


***********************************************************************
* read in 2011 and 2019 MA county-plan type-level payment for table 1 *
***********************************************************************

import excel "$data_orig/2011paymentdata/2011PartCCountyLevel", cellrange(A2) firstrow case(lower) clear
keep statecountycode state plantype snpplantype averageriskscore averageabpmpmpayment averagerebatepmpmpayment
rename statecountycode stcty
destring stcty, replace
drop if inlist(state, "PR", "VI", "")
gen tot_pay_ra_11 = averageriskscore * averageabpmpmpayment + averagerebatepmpmpayment
gen plan_type = plantype + " " + snpplantype
replace plan_type = strtrim(plan_type)
merge m:1 stcty plan_type using `plan_type_weight'
keep if _merge==3
drop _merge
collapse tot_pay_ra_11 [aweight = imp_enrl], by(stcty)
summarize tot_pay_ra
save "$data_deri\tbl1_pay_11", replace


import excel "$data_orig/2019paymentdata/2019PartCCountyLevel", cellrange(A3) firstrow case(lower) clear
keep countycode stateabbreviation plantype snpplantype averagepartcriskscore averageabpmpmpayment averagerebatepmpmpayment
rename countycode stcty
destring stcty, replace
rename averagepartcriskscore averageriskscore
drop if inlist(stateabbreviation, "0", "GU", "PR", "VI")
gen tot_pay_ra_19 = averageriskscore * averageabpmpmpayment + averagerebatepmpmpayment
gen plan_type = plantype + " " + snpplantype
replace plan_type = strtrim(plan_type)
merge m:1 stcty plan_type using `plan_type_weight'
keep if _merge==3
drop _merge
collapse tot_pay_ra_19 [aweight = imp_enrl], by(stcty)
summarize tot_pay_ra
save "$data_deri\tbl1_pay_19", replace


*****************************
* Census region for table 1 *
*****************************

import excel "$data_orig\census_regions.xlsx", sheet("Sheet1") firstrow case(lower) clear
gen northeast = (region == "Northeast")
gen midwest = (region == "Midwest")
gen south = (region == "South")
gen west = (region == "West")
save "$data_deri\region_xwalk", replace


******************************
* Create MA enrollment panel *
******************************

use "$data_deri\enroll_2008", clear
forvalues yr = 2009/2019 {
	append using "$data_deri\enroll_`yr'"
}
sort stcty year
merge m:1 stcty using "$data_deri\pop_weights", gen(weight_merge)
list statename countyname stcty year if weight_merge==1
merge m:1 stcty using "$data_deri\dichot_flags", gen(bnch_merge)
list statename countyname stcty year if bnch_merge==1
list state county stcty if bnch_merge==2

keep if weight_merge==3 & bnch_merge==3

destring eligibles*, replace ignore(",")
destring enrolled*, replace ignore("," "*")
destring penetration*, replace ignore("%")

replace penetration=0 if penetration==. //960 county-years penetration suppressed due to <10 enrollees, changed to 0% penetration

drop if penetration>100 //one county-year dropped due to implausible MA penetration rate

gen phaseIn = (year>=2012 & year<=2016)
gen post = (year>=2017)
gen post_alt = (year>=2012)
gen pre_trend = (year-2011)*q4_prop_flag

gen q4_yr12 = q4_prop_flag * (year==2012)
gen q4_yr13 = q4_prop_flag * (year==2013)
gen q4_yr14 = q4_prop_flag * (year==2014)
gen q4_yr15 = q4_prop_flag * (year==2015)
gen q4_yr16 = q4_prop_flag * (year==2016)
gen q4_yr17 = q4_prop_flag * (year==2017)
gen q4_yr18 = q4_prop_flag * (year==2018)
gen q4_yr19 = q4_prop_flag * (year==2019)

label variable stcty "Counties"
label variable pre_trend "Pre-Trend"

xtset stcty year

compress
save "$data_deri\enroll_panel", replace


********************************
* Create MA plan payment panel *
********************************

use "$data_deri\plantype_pay_2008", clear
forvalues i = 2009/2019 {
	append using "$data_deri\plantype_pay_`i'"
}
drop if inlist(state, "0", "GU", "PR", "VI", "")
gen payment_ra = averageriskscore * averageabpmpmpayment
gen tot_payment_ra = averageriskscore * averageabpmpmpayment + averagerebatepmpmpayment

gen plan_type = plantype + " " + snpplantype
replace plan_type = strtrim(plan_type)

merge m:1 stcty state using "$data_deri\dichot_flags", keepusing(county med_prop_flag q4_prop_flag q10_prop_flag bnch_cut_prop) //45 counties with benchmark info do not have any payment information
list stcty state county if _merge==2
keep if _merge==3
drop _merge

merge m:1 stcty plan_type using `plan_type_weight'
drop _merge

gen phaseIn = (year>=2012 & year<=2016)
gen post = (year>=2017)
gen post_alt = (year>=2012)
gen pre_trend = (year-2011)*q4_prop_flag

gen q4_yr12 = q4_prop_flag * (year==2012)
gen q4_yr13 = q4_prop_flag * (year==2013)
gen q4_yr14 = q4_prop_flag * (year==2014)
gen q4_yr15 = q4_prop_flag * (year==2015)
gen q4_yr16 = q4_prop_flag * (year==2016)
gen q4_yr17 = q4_prop_flag * (year==2017)
gen q4_yr18 = q4_prop_flag * (year==2018)
gen q4_yr19 = q4_prop_flag * (year==2019)

label variable stcty "Counties"
label variable pre_trend "Pre-Trend"

egen panelid = group(stcty plan_type), missing

xtset panelid year

compress
save "$data_deri\payment_panel", replace
